There are 231 subjects with patients’ age and the amount of exercise expressed as estimated hours per week with 2 groups (control and patient).
data("Blackmore")
skimr::skim(Blackmore)
| Name | Blackmore |
| Number of rows | 945 |
| Number of columns | 4 |
| _______________________ | |
| Column type frequency: | |
| factor | 2 |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| subject | 0 | 1 | FALSE | 231 | 100: 5, 101: 5, 105: 5, 106: 5 |
| group | 0 | 1 | FALSE | 2 | pat: 586, con: 359 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| age | 0 | 1 | 11.44 | 2.77 | 8 | 10.0 | 12.00 | 14.00 | 17.92 | ▇▇▇▆▃ |
| exercise | 0 | 1 | 2.53 | 3.50 | 0 | 0.4 | 1.33 | 3.04 | 29.96 | ▇▁▁▁▁ |
control <- Blackmore[Blackmore$group == "control",]
skimr::skim(control)
| Name | control |
| Number of rows | 359 |
| Number of columns | 4 |
| _______________________ | |
| Column type frequency: | |
| factor | 2 |
| numeric | 2 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| subject | 0 | 1 | FALSE | 93 | 200: 5, 205: 5, 207: 5, 212: 5 |
| group | 0 | 1 | FALSE | 1 | con: 359, pat: 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| age | 0 | 1 | 11.26 | 2.70 | 8 | 8.00 | 10.00 | 13.50 | 17.92 | ▇▇▇▅▂ |
| exercise | 0 | 1 | 1.64 | 1.81 | 0 | 0.43 | 1.05 | 2.15 | 11.54 | ▇▂▁▁▁ |
length(unique(control$subject))
## [1] 93
Blackmore$log.exercise <- log(Blackmore$exercise + 5/60, 2)
attach(Blackmore)
con <- sample(unique(subject[group == "control"]), 20)
con.20 <- Blackmore[is.element(subject, con),]
ggplot(con.20, aes(I(age-8), log.exercise)) + geom_point() + facet_wrap(.~subject)
pat <- sample(unique(subject[group == "control"]), 20)
pat.20 <- Blackmore[is.element(subject, pat),]
ggplot(pat.20, aes(I(age-8), log.exercise)) +
geom_point() + facet_wrap(.~subject) +
geom_smooth(method = "lm", se = FALSE)
detach(Blackmore)
summary(I(Blackmore$age - 8))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.000 2.000 4.000 3.442 6.000 9.920
fit <- lmer(log.exercise ~ I(age-8)*group + (1 |subject), data = Blackmore)
summary(fit)
## Linear mixed model fit by REML ['lmerMod']
## Formula: log.exercise ~ I(age - 8) * group + (1 | subject)
## Data: Blackmore
##
## REML criterion at convergence: 3632.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2142 -0.4735 0.1269 0.5643 2.7214
##
## Random effects:
## Groups Name Variance Std.Dev.
## subject (Intercept) 1.875 1.369
## Residual 1.807 1.344
## Number of obs: 945, groups: subject, 231
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) -0.28322 0.18066 -1.568
## I(age - 8) 0.06901 0.02717 2.540
## grouppatient -0.33354 0.23306 -1.431
## I(age - 8):grouppatient 0.22816 0.03387 6.737
##
## Correlation of Fixed Effects:
## (Intr) I(g-8) grpptn
## I(age - 8) -0.471
## grouppatint -0.775 0.365
## I(g-8):grpp 0.378 -0.802 -0.473
vcov is the covariance matrix of the fixed-effect estimates which is consistent with the mathmatical expression \(V(\hat \beta) = (X^\top \Omega^{-1} X)^{-1}\) (solve(t(X) %*% Sigma_inv %*% X)).
N <- nrow(Blackmore)
subjects <- unique(Blackmore$subject)
y <- Blackmore$log.exercise
X <- model.matrix(~ I(age-8)*group, data = Blackmore)
beta <- fixef(fit)
Z <- model.matrix(~ subject-1, data = Blackmore)
u <- ranef(fit)$subject[,1]
q <- length(u)
vc <- as.data.frame(VarCorr(fit))
R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
G <- vc$vcov[vc$grp == 'subject'] * diag(q)
Sigma <- Z %*% G %*% t(Z) + R
Sigma_inv <- solve(Sigma)
P <- Sigma_inv - Sigma_inv %*% X %*% vcov(fit) %*% t(X) %*% Sigma_inv
varMargRes <- Sigma - X %*% vcov(fit) %*% t(X)
varCondRes <- R %*% P %*% R
varU <- G - G %*% t(Z) %*% P %*% Z %*% G
fit_bl <- Blackmore %>%
mutate(fitted = as.vector(X %*% beta),
resm = y - fitted,
resc = y - fitted - Z %*% u,
index = 1:n()) %>%
rowwise() %>%
mutate(resm_std = resm/sqrt(diag(varMargRes)[index]),
resc_std = resc/sqrt(diag(varCondRes)[index])) %>%
ungroup()
unit_bl <- expand_grid(subjects) %>%
mutate(Vi = map_dbl(subjects, ~{
ind <- Blackmore %>%
mutate(index = 1:n()) %>%
filter(subject == .x) %>%
pull(index)
mi <- length(ind)
Ei <- solve(expm::sqrtm(varMargRes[ind, ind])) %*% fit_bl$resm[ind]
sqrt(sum((diag(mi) - Ei %*% t(Ei))^2)) / mi
}),
Mi = as.vector(t(u) %*% solve(varU) %*% u),
index = 1:n())
ggplot(fit_bl, aes(fitted, resm_std)) +
geom_hline(yintercept = 0) +
geom_point()
ggplot(fit_bl, aes(index, resm_std)) + geom_point() + geom_hline(yintercept = 0)
ggplot(unit_bl, aes(index, Vi)) + geom_point()+ xlab("Unit indicies") + ylab("Modified Lesaffre-Verbeke index")
ggplot(fit_bl, aes(index, resc_std)) + geom_point() + geom_hline(yintercept = 0)
ggplot(fit_bl, aes(fitted, resc_std)) + geom_point() + geom_hline(yintercept = 0)
ggplot(fit_bl, aes(sample = resc_std)) +
geom_qq() + geom_qq_line(color = "red")
ggplot(unit_bl, aes(index, Mi)) + geom_point() + xlab("Unit index") + ylab("Manhalanobis distance")
ggplot(unit_bl, aes(sample = Mi)) +
geom_qq(distribution = stats::qchisq,
dparams = list(df = q)) +
geom_qq_line(color = "red",
distribution = stats::qchisq,
dparams = list(df = q))
lineup(null_permute("resm_std"), fit_bl) %>%
ggplot(aes(fitted, resm_std)) +
geom_point() +
facet_wrap(~.sample) +
xlab(NULL) +
ylab(NULL) +
theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
axis.ticks.x = element_blank(), axis.ticks.y = element_blank())
lineup(null_permute("resm_std"), fit_bl) %>%
ggplot(aes(index, resm_std)) +
geom_point() +
facet_wrap(~.sample) +
xlab(NULL) +
ylab(NULL) +
theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
axis.ticks.x = element_blank(), axis.ticks.y = element_blank())
lineup(null_permute("Vi"), unit_bl) %>%
ggplot(aes(index, Vi)) +
geom_point() +
facet_wrap(~.sample) +
xlab(NULL) +
ylab(NULL) +
theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
axis.ticks.x = element_blank(), axis.ticks.y = element_blank())
lineup(null_permute("resc_std"), fit_bl) %>%
ggplot(aes(index, resc_std)) +
geom_point() +
facet_wrap(~.sample) +
xlab(NULL) +
ylab(NULL) +
theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
axis.ticks.x = element_blank(), axis.ticks.y = element_blank())
lineup(null_permute("resc_std"), fit_bl) %>%
ggplot(aes(fitted, resc_std)) +
geom_point() +
facet_wrap(~.sample) +
xlab(NULL) +
ylab(NULL) +
theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
axis.ticks.x = element_blank(), axis.ticks.y = element_blank())
lineup(null_permute("resc_std"), fit_bl) %>%
ggplot(aes(sample = resc_std)) +
geom_qq() +
geom_qq_line(color = "red") +
facet_wrap(~.sample) +
xlab(NULL) +
ylab(NULL) +
theme(axis.text.y = element_blank(), axis.text.x = element_blank(),
axis.ticks.x = element_blank(), axis.ticks.y = element_blank())
Since the \(M_i\) are the same, there is no need to do that
lineup(null_permute("Mi"), unit_bl) %>%
ggplot(aes(index, Mi))+
geom_point()+
facet_wrap(~.sample)
lineup(null_permute("Mi"), unit_bl) %>%
ggplot(aes(sample = Mi)) +
geom_qq(distribution = stats::qchisq,
dparams = list(df = q)) +
geom_qq_line(color = "red",
distribution = stats::qchisq,
dparams = list(df = q)) +
facet_wrap(~.sample)
d <- lineup(null_permute("log.exercise"), Blackmore)
simdat <- map_dfr(1:20, function(i){
df <- d %>% filter(.sample == i)
m <- fit <- lmer(log.exercise ~ I(age-8)*group + (1 |subject), data = df)
N <- nrow(df)
subjects <- unique(df$subject)
y <- df$log.exercise
X <- model.matrix(~ I(age-8)*group, data = df)
beta <- fixef(m)
Z <- model.matrix(~ subject-1, data = df)
u <- ranef(m)$subject[,1]
q <- length(u)
vc <- as.data.frame(VarCorr(m))
R <- vc$vcov[vc$grp == 'Residual'] * diag(N)
G <- vc$vcov[vc$grp == 'subject'] * diag(q)
Sigma <- Z %*% G %*% t(Z) + R
Sigma_inv <- solve(Sigma)
P <- Sigma_inv - Sigma_inv %*% X %*% vcov(m) %*% t(X) %*% Sigma_inv
varMargRes <- Sigma - X %*% vcov(m) %*% t(X)
varCondRes <- R %*% P %*% R
varU <- G - G %*% t(Z) %*% P %*% Z %*% G
fitted = as.vector(X %*% beta)
resm = y - fitted
resc = y - fitted - Z %*% u
index = 1:nrow(df)
resm_std = resm / sqrt(diag(varMargRes)[index])
resc_std = resc / sqrt(diag(varCondRes)[index])
tibble::tibble(df, fitted, resm_std, resc_std, index)
})